{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Plate ID', 'Registration State', 'Issue Date', 'Vehicle Make',\n",
       "       'Street Name', 'Date First Observed', 'Vehicle Color'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filename = '../data/nyc-parking-violations-2020.csv'\n",
    "\n",
    "df = pd.read_csv(filename,\n",
    "                usecols=['Date First Observed', 'Plate ID', 'Registration State',\n",
    "                        'Issue Date', 'Vehicle Make', 'Street Name', 'Vehicle Color'])\n",
    "df.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Beyond 1\n",
    "\n",
    "What were the three most commonly ticket car makes to be issued tickets on January 2nd through January 10th?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Vehicle Make\n",
       "FORD     38958\n",
       "TOYOT    37096\n",
       "HONDA    35962\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We can use a slice, but only after sorting the index from lowest to highest\n",
    "df = df.set_index('Issue Date')\n",
    "df = df.sort_index()\n",
    "df.loc['01/02/2020 12:00:00 AM':'01/10/2020 23:59:59 PM', 'Vehicle Make'].value_counts().head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Vehicle Make\n",
       "FORD     38958\n",
       "TOYOT    37096\n",
       "HONDA    35962\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "df = df.reset_index()   # undo the setting of the index from the previous cell\n",
    "\n",
    "# let's do it in chained format:\n",
    "\n",
    "(\n",
    "    df\n",
    "    .set_index('Issue Date')\n",
    "    .sort_index()\n",
    "    .loc['01/02/2020 12:00:00 AM':'01/10/2020 23:59:59 PM', \n",
    "         'Vehicle Make']\n",
    "    .value_counts()\n",
    "    .head(3)\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Beyond 2\n",
    "\n",
    "How many tickets did the second-most-ticketed car get in 2020?  (And why am I not interested in the most-ticketed plate?) What state was that car from, and was it always ticketed in the same location?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Plate ID\n",
       "BLANKPLATE    8882\n",
       "2704819       1535\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Most common plate is... BLANKPLATE!\n",
    "# Second-most common is 2704819\n",
    "df = df.reset_index()\n",
    "df['Plate ID'].value_counts().head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Plate ID\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "           ..\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "2704819    IN\n",
       "Name: Registration State, Length: 1535, dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# It's from Indiana\n",
    "df = df.set_index('Plate ID')\n",
    "df.loc['2704819', 'Registration State']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Street Name\n",
       "8th Ave              395\n",
       "Penn Plz             230\n",
       "7th Ave               92\n",
       "9th Ave               63\n",
       "Broadway              57\n",
       "                    ... \n",
       "6TH AVE                1\n",
       "W 54TH ST              1\n",
       "E 39th St              1\n",
       "N/S NW C/O W 30TH      1\n",
       "E 49th St              1\n",
       "Name: count, Length: 113, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# was it always ticketed in the same place?  No, but there were a lot in the same area...\n",
    "df.loc['2704819', 'Street Name'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Beyond 3\n",
    "\n",
    "Would it be useful to set the index to \"Date First Observed\"? Why or why not?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Date First Observed\n",
       "0           12371344\n",
       "20200311         887\n",
       "20200205         795\n",
       "20200212         793\n",
       "20200310         770\n",
       "              ...   \n",
       "20220412           1\n",
       "20191131           1\n",
       "20200813           1\n",
       "20160614           1\n",
       "20201230           1\n",
       "Name: count, Length: 465, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Not very useful -- the value is set to 0 for 99% of the values!\n",
    "\n",
    "df = df.reset_index()\n",
    "df['Date First Observed'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
